

########## IMPUTATION OF INCOME / EDUCATION PERCENTILE COMBINATIONS ##########

######################
## PACKAGES
######################
library(dplyr)
library(haven)
library(Hmisc)
library(MASS)
library(openxlsx)
library(foreign)

covariances <- read_excel("coveriances income education 1965 to 2014.xlsx")

#######################################################
## STEP 2: Make variance/covariance matrix 
#######################################################
d <- read_excel("Data for Affluence and influence in a social democracy.xlsx")

d <- d %>% dplyr::select(-(pred_i90e90:pred_i10e10)) # removes variables to be calculated if they were already in the dataset

# Calculate total n in each category
d$S_HINC1 <- (d$HINC1_FAV+d$HINC1_OPP)
d$S_HINC2 <- (d$HINC2_FAV+d$HINC2_OPP)
d$S_HINC3 <- (d$HINC3_FAV+d$HINC3_OPP)
d$S_HINC4 <- (d$HINC4_FAV+d$HINC4_OPP)
d$S_HINC5 <- (d$HINC5_FAV+d$HINC5_OPP)
d$S_HINC6 <- (d$HINC6_FAV+d$HINC6_OPP)
d$S_HINC7 <- (d$HINC7_FAV+d$HINC7_OPP)
d$S_HINC8 <- (d$HINC8_FAV+d$HINC8_OPP)
d$S_HINC9 <- (d$HINC9_FAV+d$HINC9_OPP)
d$S_HINC10 <- (d$HINC10_FAV+d$HINC10_OPP)
d$S_HINC11 <- (d$HINC11_FAV+d$HINC11_OPP)
d$S_HINC12 <- (d$HINC12_FAV+d$HINC12_OPP)

d$S_EDU1 <- (d$EDU1_FAV+d$EDU1_OPP)
d$S_EDU2 <- (d$EDU2_FAV+d$EDU2_OPP)
d$S_EDU3 <- (d$EDU3_FAV+d$EDU3_OPP)
d$S_EDU4 <- (d$EDU4_FAV+d$EDU4_OPP)
d$S_EDU5 <- (d$EDU5_FAV+d$EDU5_OPP)
d$S_EDU6 <- (d$EDU6_FAV+d$EDU6_OPP)
d$S_EDU7 <- (d$EDU7_FAV+d$EDU7_OPP)
d$S_EDU8 <- (d$EDU8_FAV+d$EDU8_OPP)
d$S_EDU9 <- (d$EDU9_FAV+d$EDU9_OPP)
d$S_EDU10 <- (d$EDU10_FAV+d$EDU10_OPP)

# Calculate % support in each category 
d$pcFAV_INC1 <- d$HINC1_FAV/(d$HINC1_FAV+d$HINC1_OPP)
d$pcFAV_INC2 <- d$HINC2_FAV/(d$HINC2_FAV+d$HINC2_OPP)
d$pcFAV_INC3 <- d$HINC3_FAV/(d$HINC3_FAV+d$HINC3_OPP)
d$pcFAV_INC4 <- d$HINC4_FAV/(d$HINC4_FAV+d$HINC4_OPP)
d$pcFAV_INC5 <- d$HINC5_FAV/(d$HINC5_FAV+d$HINC5_OPP)
d$pcFAV_INC6 <- d$HINC6_FAV/(d$HINC6_FAV+d$HINC6_OPP)
d$pcFAV_INC7 <- d$HINC7_FAV/(d$HINC7_FAV+d$HINC7_OPP)
d$pcFAV_INC8 <- d$HINC8_FAV/(d$HINC8_FAV+d$HINC8_OPP)
d$pcFAV_INC9 <- d$HINC9_FAV/(d$HINC9_FAV+d$HINC9_OPP)
d$pcFAV_INC10 <- d$HINC10_FAV/(d$HINC10_FAV+d$HINC10_OPP)
d$pcFAV_INC11 <- d$HINC11_FAV/(d$HINC11_FAV+d$HINC11_OPP)
d$pcFAV_INC12 <- d$HINC12_FAV/(d$HINC12_FAV+d$HINC12_OPP)

d$pcFAV_EDU1 <- d$EDU1_FAV/(d$EDU1_FAV+d$EDU1_OPP)
d$pcFAV_EDU2 <- d$EDU2_FAV/(d$EDU2_FAV+d$EDU2_OPP)
d$pcFAV_EDU3 <- d$EDU3_FAV/(d$EDU3_FAV+d$EDU3_OPP)
d$pcFAV_EDU4 <- d$EDU4_FAV/(d$EDU4_FAV+d$EDU4_OPP)
d$pcFAV_EDU5 <- d$EDU5_FAV/(d$EDU5_FAV+d$EDU5_OPP)
d$pcFAV_EDU6 <- d$EDU6_FAV/(d$EDU6_FAV+d$EDU6_OPP)
d$pcFAV_EDU7 <- d$EDU7_FAV/(d$EDU7_FAV+d$EDU7_OPP)
d$pcFAV_EDU8 <- d$EDU8_FAV/(d$EDU8_FAV+d$EDU8_OPP)
d$pcFAV_EDU9 <- d$EDU9_FAV/(d$EDU9_FAV+d$EDU9_OPP)
d$pcFAV_EDU10 <- d$EDU10_FAV/(d$EDU10_FAV+d$EDU10_OPP)

# For-loop that goes through each row in the dataset and estimates imputed support at specified income/education combinations
for(i in 1:nrow(d)){
drow <- d[i,]
# INCOME/SUPPORT COVARIANCE
d1 <- drow %>% dplyr::select(starts_with("S_HINC"))
d1 <- as.data.frame(t(d1))
d1$prop <- d1$V1/sum(d1$V1, na.rm=T)
d1$cumu <- cumsum(d1$prop)
d1 <- mutate(d1, score = ((cumu - lag(cumu))/2)+lag(cumu)) # Make percentile midpoint scores
d1$score[1] <- d1$cumu[1]-(d1$cumu[1]/2) # Make percentile midpoint score for first category since formula above returns NA for that one
d1$income <- d1$Var1
d1 <- add_rownames(d1, "income")
names(d1)[names(d1)=="V1"] <- "n"
d1$income <- rownames(d1)
d1 <- dplyr::select(d1, income, score, n)
d2 <- drow %>% dplyr::select(starts_with("pcFAV_INC"))
d2 <- as.data.frame(t(d2)) 
d2$income <- 1:nrow(d2) 
names(d2)[names(d2)=="V1"] <- "sup"
d3 <- merge(d1,d2, by="income")
cor(d3$sup,d3$score,use="complete.obs")
d3<-na.omit(d3)
d4<- d3 %>% dplyr::select(score,sup)
sup_inc_cov<-cov.wt(d4,wt=d3$n)$cov[1,2]
inc_var<-cov.wt(d4,wt=d3$n)$cov[1,1]
inc_mean <- weighted.mean(d3$score,w=d3$n)
# EDUCATION/SUPPORT COVARIANCE
d1 <- drow %>% dplyr::select(starts_with("S_EDU"))
d1 <- as.data.frame(t(d1))
d1$prop <- d1$V1/sum(d1$V1, na.rm=T)
d1$cumu <- cumsum(d1$prop)
d1 <- mutate(d1, score = ((cumu - lag(cumu))/2)+lag(cumu)) # Make percentile midpoint scores
d1$score[1] <- d1$cumu[1]-(d1$cumu[1]/2) # Make percentile midpoint score for first category since formula above returns NA for that one
d1$edu <- d1$Var1
d1 <- add_rownames(d1, "edu")
names(d1)[names(d1)=="V1"] <- "n"
d1$edu <- rownames(d1)
d1 <- dplyr::select(d1, edu, score, n)
d2 <- drow %>% dplyr::select(starts_with("pcFAV_EDU"))
d2 <- as.data.frame(t(d2)) 
d2$edu <- 1:nrow(d2) 
names(d2)[names(d2)=="V1"] <- "sup"
d3 <- merge(d1,d2, by="edu")
cor(d3$sup,d3$score,use="complete.obs")
d3<-na.omit(d3)
d4<- d3 %>% dplyr::select(score,sup)
sup_edu_cov<-cov.wt(d4,wt=d3$n)$cov[1,2]
edu_var<-cov.wt(d4,wt=d3$n)$cov[1,1] 
edu_mean <- weighted.mean(d3$score,w=d3$n)
# SUPPORT VARIANCE
d1 <- drow %>% dplyr::select(OVERALL_FAV,OVERALL_OPP)
d1 <- as.data.frame(t(d1))
d1$sup<-c(1,0)
sup_var <- wtd.var(d1$sup,d1$V1) 
sup_mean <- weighted.mean(d1$sup,w=d1$V1)
# BUILD VARIANCE COVARIANCE MATRIX
sup <- c(sup_var,sup_inc_cov,sup_edu_cov)
inc <- c(sup_inc_cov,inc_var,covariances$cov[covariances$year==drow$ASKED_YEAR])
edu <- c(sup_edu_cov,covariances$cov[covariances$year==drow$ASKED_YEAR],edu_var)
varcovar <- data.frame(sup,inc,edu)
rownames(varcovar) <- c("sup","inc","edu")
varcovar <- data.matrix(varcovar)
varcovar # VARIANCE COVARIANCE MATRIX
means <- c(sup_mean,inc_mean,edu_mean)
data<-as.data.frame(mvrnorm(n=drow$ORG_DATASET_N,means,varcovar,empirical=T)) # Generate dataset based on this var-covar-matrix
m1 <- glm(sup ~ inc + I(inc^2) + edu + I(edu^2), data=data) # Income, income^2, education, and education^2 as IVs, just like Gilens (2012, 93-94)
d4 <- expand.grid(inc=c(0.90,0.70,0.50,0.10),edu=c(0.90,0.50,0.10))
d4$pred <- predict(m1, d4, type="response")
d$pred_i90e90[i] <- d4$pred[1]
d$pred_i70e90[i] <- d4$pred[2]
d$pred_i50e90[i] <- d4$pred[3]
d$pred_i10e90[i] <- d4$pred[4]
d$pred_i90e50[i] <- d4$pred[5]
d$pred_i70e50[i] <- d4$pred[6]
d$pred_i50e50[i] <- d4$pred[7]
d$pred_i10e50[i] <- d4$pred[8]
d$pred_i90e10[i] <- d4$pred[9]
d$pred_i70e10[i] <- d4$pred[10]
d$pred_i50e10[i] <- d4$pred[11]
d$pred_i10e10[i] <- d4$pred[12]
}

############################################################################
############################## END #########################################
############################################################################

write.xlsx(d, "Data for Affluence and influence in a social democracy..xlsx") # Writes out master dataset with the new variables added









